#!/usr/bin/env bash
PWD=`pwd`
HDFS_PATH="/user/graph_builder/data/lz-whole"
IMAGE_DUMP_PATH=/home/dig/graph_data/image_data
CHANGSHA_JSON=/home/dig/graph_data/changsha_json
HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop

if [ ! -d "$IMAGE_DUMP_PATH" ]; then
    mkdir -p ${IMAGE_DUMP_PATH}
fi

# 图构建的
versions_path=`$HADOOP_HOME/bin/hadoop fs -ls ${HDFS_PATH}/parsed* |grep image|awk '{print $NF}'|sort|tail -n 10`
for vp in ${versions_path}
do
    echo ${vp}
    dumped=`$HADOOP_HOME/bin/hadoop fs -ls ${vp}|grep DUMPED`
    version=`echo ${vp}|awk -F'/' '{print $6}'`
    echo $version
    if [ "${dumped}" == "" -a ! -d ${IMAGE_DUMP_PATH}/data_${version} ];then
        if [ ! -d "$IMAGE_DUMP_PATH/data" ];then
            mkdir -p $IMAGE_DUMP_PATH/data
        fi
        $HADOOP_HOME/bin/hadoop fs -copyToLocal ${vp} ${IMAGE_DUMP_PATH}/data/
        $HADOOP_HOME/bin/hadoop fs -cat $HDFS_PATH/$version/source_version/part* > ${IMAGE_DUMP_PATH}/data/source_version
        mv ${IMAGE_DUMP_PATH}/data ${IMAGE_DUMP_PATH}/data_${version}
        touch DUMPED
        $HADOOP_HOME/bin/hadoop fs -put ./DUMPED ${vp}/
    fi
done


